---
title: "Table A.7"
output: 
---

# Table A.7

# Who's to Blame? Postconflict Violence and Public Attitudes Towards Peace Agreements
# Wyer, Frank. 

#clear environment
```{r clear environment}
rm(list = ls())
```

# uncomment and set working directory to replication archive
# setwd("~/blame_replication")

# Uncomment to install packages if necessary
# install.packages("tidyverse")
# install.packages("readxl")
# install.packages("estimatr")
# install.packages("texreg")


#load packages
```{r}
library(tidyverse)
library(readxl)
library(estimatr)
library(texreg)
```

#read in survey data
```{r}
survey_clean <- read.csv("survey_clean.csv")
```

#read in census population data
```{r}
census_df <- read_xlsx("Raw Data Files/census_data_DANE.xlsx", skip = 11)
```

#clean census data
```{r clean census}
census_pop_22 <- census_df %>% filter(AÑO == 2022 & `ÁREA GEOGRÁFICA` != "Total") %>% dplyr::select(!contains("Total")) %>% pivot_longer(Hombres_0:`Mujeres_100 y más`, names_to = "demo", values_to = "pop") %>% separate_wider_delim(demo, names = c("gender", "age"), delim = "_") %>% rename(year = AÑO, areatype = `ÁREA GEOGRÁFICA`) %>% mutate(age = as.numeric(str_remove(age, " y más"))) %>% filter(age > 17) #limit to adult population in 2022
```

#generate population categories to match survey categories
```{r}
census_pop_22 <- census_pop_22 %>% mutate(
age_cat = case_when( #variable for age categories
age < 26 ~ 1, 
age > 25 & age < 36 ~ 2,
age > 35 & age < 46 ~ 3,
age > 45 & age < 56 ~ 4,
age > 55 & age < 66 ~ 5,
age > 65 ~ 6
),
area_cat = case_when( #variable for urban center or rural periphery
areatype == "Cabecera" ~ 1,
areatype == "Centros Poblados y Rural Disperso" ~ 2
),
sex_cat = case_when( #variable for male or female
gender == "Hombres" ~ 1,
gender == "Mujeres" ~ 2
)
)
```

#aggregate to strata and calculate proportions in population
```{r}
totpop22 <- census_pop_22 %>% ungroup() %>% summarise(totpop = sum(pop)) %>% as_vector() #total adult population

census_pop_22 <- census_pop_22 %>% group_by(area_cat, age_cat, sex_cat) %>% summarise(stratapop = sum(pop), strataprop = stratapop/totpop22) #proportion of population within each strata
```

#calculate proportion of respondents within each strata
```{r}
target_marginals <- c("age_cat", "area_cat", "sex_cat")

survey_marginals <- survey_clean %>%
    group_by(across(all_of(target_marginals))) %>%
    summarize(survey_prop = n() / nrow(survey_clean))  
  
merged_marginals <- left_join(survey_marginals, census_pop_22, by = target_marginals)
```

#generate weights and add weights to survey data
```{r}
weight_adjustments <- merged_marginals %>%
    mutate(adjustment = strataprop / survey_prop)

survey_clean <- survey_clean %>%
    left_join(weight_adjustments, by = target_marginals) %>%
    mutate(weight = adjustment / sum(adjustment)) 
```

#estimate weighted PATE models
```{r weighted models}
eln_pate <- lm_lin(formula = eln_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, weights = weight, alpha = .05)

accords_pate <- lm_lin(formula = accords_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, weights = weight, alpha = .05)

dissident_pate <- lm_lin(dissident_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, weights = weight, alpha = .05)

index_pate <- lm_lin(outcomes_zscale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, weights = weight, alpha = .05)
```

#estimate unweighted SATE models
```{r unweighted models}
eln_sate <- lm_lin(formula = eln_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, alpha = .05)

accords_sate <- lm_lin(formula = accords_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, alpha = .05)

dissident_sate <- lm_lin(dissident_scale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, alpha = .05)

index_sate <- lm_lin(outcomes_zscale ~ treatment, covariates = ~ Q15 + Q25 + urbandummy + engage_zscale + farc_presence + homratediff + factor(regionname), se_type = "HC2", data = survey_clean, alpha = .05)
```

#generate table for results of weighted and unweighted treatment effects
```{r results table}
texreg(list(accords_sate, eln_sate, dissident_sate, index_sate, accords_pate, eln_pate, dissident_pate, index_pate), custom.coef.map = list("(Intercept)" = "(Intercept)", "treatmentT1" = "Postconflict Violence Treatment", "treatmentT2A" = "Government Culpability Treatment", "treatmentT2B" = "Rebel Culpability Treatment"), digits = 2, include.ci = FALSE, single.row = FALSE, include.fstatistic = FALSE, include.rmse = FALSE, include.rsquared = FALSE, include.adjrs = FALSE, include.nobs = TRUE, stars = numeric(0), float.pos = "h", caption.above	= TRUE, caption = "Comparing Weighted and Unweighted Treatment Effects")
```